library(ggplot2)
  library(reshape2)
  library(dplyr)
  library(tidyr)
  library(GGally)
  library(grid)
  "%&%" = function(a,b) paste(a,b,sep="")
  source('/Volumes/im-lab/nas40t2/hwheeler/PrediXcan-Paper/scripts/Heather/make-figures/multiplot.R')
  my.dir <- '/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/'
  fig.dir <- '~/GitHub/GenArch/GenArchPaper/Figures/'

Fig1

DGN-WB joint heritability. Local h2 is estimated with SNPs within 1 Mb of each gene. Global h2 is estimated with SNPs that are eQTLs in the Framingham Heart Study on other chromosomes (FDR < 0.05).

otherfile<-my.dir %&% 'expArch_DGN-WB_imputedGTs/DGN-WB.h2.all.models_FHSfdr0.05.all.Chr1-22_globalOtherChr.2015-03-18.txt'

fdrother<-read.table(otherfile,header=T) ##FHS eQTLs w/fdr<0.05 on non-gene chromosomes used to define global GRM

##Plot FDR based results
a<-ggplot(fdrother,aes(x=loc.jt.h2,y=glo.jt.h2)) + geom_point(cex=0.8) + geom_abline(intercept=1, slope=-1,color='red') + xlab(expression("local h"^2)) + ylab(expression("global h"^2)) + coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05)) + theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"))

##plot joint h2 estimates
local <- fdrother %>% select(loc.jt.h2,loc.jt.se) %>% arrange(loc.jt.h2) 
names(local) = c('h2','se')
data <- local %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) )
data <- data[complete.cases(data),]
cigt0 <- data$ymin>0
table(cigt0)
## cigt0
## FALSE  TRUE 
##  4974  5989
ptrue<-round(table(cigt0)/sum(table(cigt0)),3)*100
ptrue
## cigt0
## FALSE  TRUE 
##  45.4  54.6
meanh2<-round(mean(data$h2),3)
meanh2
## [1] 0.13
data <- mutate(data,nonzeroCI=cigt0,position=1:nrow(data))

my_grob = grobTree(textGrob(substitute(paste("mean ", h^2, " = ", m),list(m=meanh2)), x=0.05,  y=0.60, hjust=0,gp=gpar(fontsize=14)))
my_grob2 = grobTree(textGrob(substitute(paste("percent TRUE = ", m),list(m=ptrue[2])), x=0.05,  y=0.66, hjust=0,gp=gpar(fontsize=14)))

b<-ggplot(data,aes(x=position,y=h2,ymin=ymin, ymax=ymax, color=nonzeroCI) ) + geom_pointrange(col='gray')+geom_point()+ylab(expression("local h"^2)) + xlab(expression("genes ordered by local h"^2))+coord_cartesian(ylim=c(-0.05,1.05))+theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),legend.justification=c(0,1),legend.position=c(0,1))+annotation_custom(my_grob)+annotation_custom(my_grob2)

global <- fdrother %>% select(glo.jt.h2,glo.jt.se) %>% arrange(glo.jt.h2) 
names(global) = c('h2','se')
data <- global %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) )
data <- data[complete.cases(data),]
cigt0 <- data$ymin>0
table(cigt0)
## cigt0
## FALSE  TRUE 
## 10505   458
ptrue<-round(table(cigt0)/sum(table(cigt0)),3)*100
ptrue
## cigt0
## FALSE  TRUE 
##  95.8   4.2
meanh2<-round(mean(data$h2),3)
meanh2
## [1] 0.076
data <- mutate(data,nonzeroCI=cigt0,position=1:nrow(data))

my_grob = grobTree(textGrob(substitute(paste("mean ", h^2, " = ", m),list(m=meanh2)), x=0.05,  y=0.60, hjust=0,gp=gpar(fontsize=14)))
my_grob2 = grobTree(textGrob(substitute(paste("percent TRUE = ", m),list(m=ptrue[2])), x=0.05,  y=0.66, hjust=0,gp=gpar(fontsize=14)))

c<-ggplot(data,aes(x=position,y=h2,ymin=ymin, ymax=ymax, color=nonzeroCI) ) + geom_pointrange(col='gray')+geom_point()+ylab(expression("global h"^2)) + xlab(expression("genes ordered by global h"^2))+coord_cartesian(ylim=c(-0.05,1.05))+theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),legend.justification=c(0,1),legend.position=c(0,1))+annotation_custom(my_grob)+annotation_custom(my_grob2)

multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)

tiff(filename=fig.dir %&% "Fig1.tiff",width=360,height=960)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)
dev.off()
## quartz_off_screen 
##                 2
png(filename=fig.dir %&% "Fig1.png",width=360,height=960)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)
dev.off()
## quartz_off_screen 
##                 2

DGN-WB marginal heritability. Local h2 is estimated with SNPs within 1 Mb of each gene. Global h2 is estimated with SNPs that are eQTLs in the Framingham Heart Study on other chromosomes (FDR < 0.05).

##Plot FDR based results
ggplot(fdrother,aes(x=local.h2,y=global.h2)) + geom_point(cex=0.8) + geom_abline(intercept=1, slope=-1,color='red') + xlab(expression("DGN marginal local h"^2)) + ylab(expression("DGN marginal global h"^2)) + coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05)) + theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"))

Fig2

DGN-WB joint heritability with known trans-eQTLs. Local h2 is estimated with SNPs within 1 Mb of each gene. Known trans h2 is estimated with SNPs that are trans-eQTLs in the Framingham Heart Study for each gene (FDR < 0.05).

transfile<-my.dir %&% 'expArch_DGN-WB_imputedGTs/DGN-WB.h2.all.models_FHSfdr0.05.all.Chr1-22_transForGene.2015-03-20.txt'

fdrtrans<-read.table(transfile,header=T) ##FHS trans-eQTLs for gene w/fdr<0.05 used to define Known trans GRM

##Plot FDR based results
a<-ggplot(fdrtrans,aes(x=loc.jt.h2,y=trans.jt.h2)) + geom_point(cex=0.8) + geom_abline(intercept=1, slope=-1,color='red') + xlab(expression("local h"^2)) + ylab(expression("known trans h"^2)) + coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05)) + theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"))
##plot joint h2 estimates
local <- fdrtrans %>% select(loc.jt.h2,loc.jt.se) %>% arrange(loc.jt.h2) 
names(local) = c('h2','se')
data <- local %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) )
data <- data[complete.cases(data),]
cigt0 <- data$ymin>0
table(cigt0)
## cigt0
## FALSE  TRUE 
##   462   732
ptrue<-round(table(cigt0)/sum(table(cigt0)),3)*100
ptrue
## cigt0
## FALSE  TRUE 
##  38.7  61.3
meanh2<-round(mean(data$h2),3)
meanh2
## [1] 0.133
data <- mutate(data,nonzeroCI=cigt0,position=1:nrow(data))

my_grob = grobTree(textGrob(substitute(paste("mean ", h^2, " = ", m),list(m=meanh2)), x=0.05,  y=0.60, hjust=0,gp=gpar(fontsize=14)))
my_grob2 = grobTree(textGrob(substitute(paste("percent TRUE = ", m),list(m=ptrue[2])), x=0.05,  y=0.66, hjust=0,gp=gpar(fontsize=14)))

b<-ggplot(data,aes(x=position,y=h2,ymin=ymin, ymax=ymax,color=nonzeroCI) ) + geom_pointrange(col='gray')+geom_point()+ylab(expression("local h"^2)) + xlab(expression("genes ordered by local h"^2))+coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-30,1280))+ theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),legend.justification=c(0,1), legend.position=c(0,1))+annotation_custom(my_grob)+annotation_custom(my_grob2)

global <- fdrtrans %>% select(trans.jt.h2,trans.jt.se) %>% arrange(trans.jt.h2) 
names(global) = c('h2','se')
data <- global %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) )
data <- data[complete.cases(data),]
cigt0 <- data$ymin>0
table(cigt0)
## cigt0
## FALSE  TRUE 
##  1144    50
ptrue<-round(table(cigt0)/sum(table(cigt0)),3)*100
ptrue
## cigt0
## FALSE  TRUE 
##  95.8   4.2
meanh2<-round(mean(data$h2),3)
meanh2
## [1] 0.021
data <- mutate(data,nonzeroCI=cigt0,position=1:nrow(data))

my_grob = grobTree(textGrob(substitute(paste("mean ", h^2, " = ", m),list(m=meanh2)), x=0.05,  y=0.60, hjust=0,gp=gpar(fontsize=14)))
my_grob2 = grobTree(textGrob(substitute(paste("percent TRUE = ", m),list(m=ptrue[2])), x=0.05,  y=0.66, hjust=0,gp=gpar(fontsize=14)))

c<-ggplot(data,aes(x=position,y=h2,ymin=ymin, ymax=ymax,color=nonzeroCI) ) + geom_pointrange(col='gray')+geom_point()+ylab(expression("known trans h"^2)) + xlab(expression("genes ordered by known trans h"^2))+coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-30,1280))+ theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),legend.justification=c(0,1), legend.position=c(0,1))+annotation_custom(my_grob)+annotation_custom(my_grob2)

multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)

tiff(filename=fig.dir %&% "Fig2.tiff",width=360,height=960)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)
dev.off()
## quartz_off_screen 
##                 2
png(filename=fig.dir %&% "Fig2.png",width=360,height=960)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),c+ggtitle('C')+theme(plot.title=element_text(hjust=0,face="bold")),cols=1)
dev.off()
## quartz_off_screen 
##                 2

Fig3

Polygenic v. sparse by elastic net.

data<-read.table(my.dir %&% 'working_DGN-WB_exp_10-foldCV_1-reps_elasticNet_eachAlphaR2_hapmap2snps_chr22_2015-01-21.txt',header=T,check.names=F)
ngenes<-dim(data)[1]
print("Elastic Net DGN-WB chr22 (" %&% ngenes %&% " genes)")
## [1] "Elastic Net DGN-WB chr22 (341 genes)"
data_long<-melt(data,by=gene)
## Using gene as id variables
a <- ggplot(data_long, aes(x = as.numeric(levels(variable))[variable] , y = value), group=gene) + geom_line(lwd=0.5,show_guide = FALSE,linetype=1) + aes(color = gene) + xlab(expression(paste("elastic net mixing parameter (",alpha, ")"))) + ylab(expression(paste("10-fold cross-validation R"^2))) + theme_gray(base_size = 20) + coord_cartesian(ylim=c(0.3,1),xlim=c(-0.02,1.02))+ geom_point(show_guide = FALSE)
print(a)

data2 <- select(data,gene,2:3,12,22)
gdata <- gather(data2,alpha,R2,2:4)
b<- ggplot(gdata, aes(y = R2 , x = gdata[,2], group=alpha, color=alpha)) + geom_point(show_guide = TRUE) + ylab(expression(paste("alpha R"^2))) + xlab(expression(paste("LASSO (",alpha,"=1) R"^2))) + geom_smooth(method = "lm")+geom_abline(intercept=0, slope=1,color='black')+ coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05)) + theme_gray(base_size = 20) + theme(legend.justification=c(0,1), legend.position=c(0,1))

blandaltman<-ggplot(gdata, aes(x = gdata[,2] , y = gdata[,2]-gdata[,4], group=alpha, color=alpha)) + geom_point(show_guide = TRUE) + ylab(expression(paste("R"^2, " difference (LASSO - alpha)"))) + xlab(expression(paste("R"^2, " (LASSO)"))) + theme_gray(base_size = 20) + theme(legend.justification=c(0,1), legend.position=c(0,1))
blandaltman

###what is the point below zero?
newgdata<-mutate(gdata, diff=`1`-R2) %>% arrange(diff)
head(newgdata)
##       gene            1 alpha         R2         diff
## 1    GTSE1 3.054340e-07     0 0.03915528 -0.039154977
## 2  ARFGAP3 5.344014e-03     0 0.01668370 -0.011339686
## 3    USP18 5.804827e-08     0 0.01030295 -0.010302887
## 4 C22orf40 4.519708e-04     0 0.01048670 -0.010034727
## 5    THAP7 1.908300e-05     0 0.00935239 -0.009333307
## 6    NFAM1 6.489269e-03     0 0.01563154 -0.009142272
filter(newgdata,gene=='GTSE1')
##    gene           1 alpha           R2          diff
## 1 GTSE1 3.05434e-07     0 3.915528e-02 -3.915498e-02
## 2 GTSE1 3.05434e-07  0.05 8.159165e-04 -8.156110e-04
## 3 GTSE1 3.05434e-07   0.5 2.353392e-06 -2.047958e-06
##G-2 And S-Phase Expressed 1, The protein encoded by this gene is only expressed in the S and G2 phases of the cell cycle, where it colocalizes with cytoplasmic tubulin and microtubules. In response to DNA damage, the encoded protein accumulates in the nucleus and binds the tumor suppressor protein p53, shuttling it out of the nucleus and repressing its ability to induce apoptosis.

png(filename=fig.dir %&% "DGNchr22blandAltmanEN.png",width=480,height=480)
blandaltman
dev.off()
## quartz_off_screen 
##                 2
#old fig3
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),b+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),cols=2)

tiff(filename=fig.dir %&% "Fig3.tiff",width=960,height=480)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),blandaltman+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),cols=2)
dev.off()
## quartz_off_screen 
##                 2
png(filename=fig.dir %&% "Fig3.png",width=960,height=480)
multiplot(a+ggtitle('A')+theme(plot.title=element_text(hjust=0,face="bold")),blandaltman+ggtitle('B')+theme(plot.title=element_text(hjust=0,face="bold")),cols=2)
dev.off()
## quartz_off_screen 
##                 2

Fig4

GTEx tissue-wide heritability. Local h2 is estimated with SNPs within 1 Mb of each gene.

h2TW<-read.table(my.dir %&% "GTEx_Tissue-Wide_local_h2.txt",header=T)
seTW<-read.table(my.dir %&% "GTEx_Tissue-Wide_local_se.txt",header=T)

###make ordered point+CI h2 plots
gTW_h2 <- gather(h2TW,"Tissue.h2","TissueWide.h2",2:11)
gTW_se <- gather(seTW,"Tissue.se","TissueWide.se",2:11)
gTW_h2_se <- cbind(gTW_h2,gTW_se[,3])
colnames(gTW_h2_se) <- c("ensid","Tissue","h2","se")
ngenes<-nrow(gTW_h2_se)/length(unique(gTW_h2_se$Tissue))
gTW_h2_se <- gTW_h2_se %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) ) %>% arrange(Tissue,h2) %>% mutate(place=rep(1:ngenes,10),nonzeroCI=ymin>0) 
fig4<-ggplot(gTW_h2_se,aes(x=place,y=h2,ymin=ymin, ymax=ymax, color=nonzeroCI) ) + facet_wrap(~Tissue,ncol=2) + geom_pointrange(col='gray')+geom_point()+theme(legend.justification=c(0,1),legend.position=c(0,1))+ylab(expression("local h"^2)) + xlab(expression("genes ordered by local h"^2))+coord_cartesian(ylim=c(-0.05,1.05))+ theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),strip.text.x = element_text(size = 18),legend.text=element_text(size=14),legend.title=element_text(size=14))

###calc % nonzero for each tissue TISSUE-WIDE
### ADD to plot legend
pvec<-vector()
h2vec<-vector()
a<-gTW_h2_se %>% select(ensid,Tissue,nonzeroCI) %>% spread(Tissue,nonzeroCI)
for(i in 2:11){
  tis<-colnames(a)[i]
  cat("\n\n---",tis,"---\n")
  print(table(a[,i]))
  per <- signif(table(a[,i])/sum(table(a[,i])),3)*100
  print(per)
  pvec <- c(pvec,per[2])
}
## 
## 
## --- CrossTissue ---
## 
## FALSE  TRUE 
## 14069  2938 
## 
## FALSE  TRUE 
##  82.7  17.3 
## 
## 
## --- AdiposeSubcutaneous ---
## 
## FALSE  TRUE 
## 15880  1127 
## 
## FALSE  TRUE 
## 93.40  6.63 
## 
## 
## --- ArteryTibial ---
## 
## FALSE  TRUE 
## 15836  1171 
## 
## FALSE  TRUE 
## 93.10  6.89 
## 
## 
## --- HeartLeftVentricle ---
## 
## FALSE  TRUE 
## 16258   749 
## 
## FALSE  TRUE 
##  95.6   4.4 
## 
## 
## --- Lung ---
## 
## FALSE  TRUE 
## 16078   929 
## 
## FALSE  TRUE 
## 94.50  5.46 
## 
## 
## --- MuscleSkeletal ---
## 
## FALSE  TRUE 
## 15944  1063 
## 
## FALSE  TRUE 
## 93.70  6.25 
## 
## 
## --- NerveTibial ---
## 
## FALSE  TRUE 
## 15541  1466 
## 
## FALSE  TRUE 
## 91.40  8.62 
## 
## 
## --- SkinSunExposedLowerleg ---
## 
## FALSE  TRUE 
## 15809  1198 
## 
## FALSE  TRUE 
## 93.00  7.04 
## 
## 
## --- Thyroid ---
## 
## FALSE  TRUE 
## 15680  1327 
## 
## FALSE  TRUE 
##  92.2   7.8 
## 
## 
## --- WholeBlood ---
## 
## FALSE  TRUE 
## 16062   945 
## 
## FALSE  TRUE 
## 94.40  5.56
###calc mean h2 for each tissue
a<-gTW_h2_se %>% select(ensid,Tissue,h2) %>% spread(Tissue,h2)
for(i in 2:11){
  tis<-colnames(a)[i]
  meanh2 <- signif(mean(a[,i]),3)
  cat(tis,"mean h2:",meanh2,"\n")
  h2vec <- c(h2vec,meanh2)
}
## CrossTissue mean h2: 0.057 
## AdiposeSubcutaneous mean h2: 0.0338 
## ArteryTibial mean h2: 0.0358 
## HeartLeftVentricle mean h2: 0.0383 
## Lung mean h2: 0.0325 
## MuscleSkeletal mean h2: 0.0279 
## NerveTibial mean h2: 0.044 
## SkinSunExposedLowerleg mean h2: 0.0351 
## Thyroid mean h2: 0.0392 
## WholeBlood mean h2: 0.0265
ann_text <- data.frame( h2 = rep(0.78,10), place = rep(5000,10), percent= pvec, mean_h2 = h2vec, Tissue = factor(colnames(a)[2:11]), ymin=rep(0.9,10), ymax=rep(0.9,10), nonzeroCI=rep(NA,10), se=rep(0.9,10))
fig4.2<-fig4+geom_text(data=ann_text,aes(label=paste("mean_h^2 ==",mean_h2,sep="")),color="black",show_guide=F,parse=T,hjust=0,size=5) 
ann_text <- data.frame( h2 = rep(0.9,10), place = rep(5000,10), percent= pvec, mean_h2 = h2vec, Tissue = factor(colnames(a)[2:11]), ymin=rep(0.9,10), ymax=rep(0.9,10), nonzeroCI=rep(NA,10), se=rep(0.9,10))
fig4<-fig4.2+geom_text(data=ann_text,aes(label=paste("percent_TRUE ==",percent,sep="")),color="black",show_guide=F,parse=T,hjust=0,size=5)

fig4

tiff(filename=fig.dir %&% "Fig4.tiff",width=720,height=960)
fig4
dev.off()
## quartz_off_screen 
##                 2
png(filename=fig.dir %&% "Fig4.png",width=720,height=960)
fig4
dev.off()
## quartz_off_screen 
##                 2

Fig5

GTEx cross-tissue and tissue-wide h2 (A) and SE (B).

h2TW<-read.table(my.dir %&% "GTEx_Tissue-Wide_local_h2.txt",header=T)
seTW<-read.table(my.dir %&% "GTEx_Tissue-Wide_local_se.txt",header=T)

gh2_TW<-gather(h2TW,"CrossTissue","Tissue",3:11)
colnames(gh2_TW) <- c('ensid','CrossTissue','TissueName','Tissue')
fig5a<-ggplot(gh2_TW,aes(x=Tissue,y=CrossTissue)) +facet_wrap(~TissueName,scales="fixed",ncol=3) + geom_point(alpha=1/10) + geom_abline(intercept=0, slope=1,color='red')  + ylab(expression('Cross-Tissue h'^2)) + xlab(expression('Tissue-Wide h'^2)) +  ggtitle("A")+ coord_cartesian(ylim=c(-0.1,1.1),xlim=c(-0.1,1.1)) + theme(plot.title = element_text(hjust = 0,face="bold")) 

gse_TW<-gather(seTW,"CrossTissue","Tissue",3:11)
colnames(gse_TW) <- c('ensid','CrossTissue','TissueName','Tissue')
fig5b<-ggplot(gse_TW,aes(x=Tissue,y=CrossTissue)) +facet_wrap(~TissueName,scales="fixed",ncol=3) + geom_point(alpha=1/10) + geom_abline(intercept=0, slope=1,color='red')  + ylab('Cross-Tissue SE') + xlab('Tissue-Wide SE') +  ggtitle("B") + coord_cartesian(ylim=c(-0.01,0.16),xlim=c(-0.01,0.16))+ theme(plot.title = element_text(hjust = 0,face="bold")) 
multiplot(fig5a,fig5b)

tiff(filename=fig.dir %&% "Fig5.tiff",width=480,height=960)
multiplot(fig5a,fig5b)
dev.off()
## quartz_off_screen 
##                 2
png(filename=fig.dir %&% "Fig5.png",width=480,height=960)
multiplot(fig5a,fig5b)
dev.off()
## quartz_off_screen 
##                 2

GTEx tissue-wide elastic net from Nick (not CV R2, he’s re-running)

chr22<-read.table(my.dir %&% "gtex-annot/gencode.v18.genes.patched_contigs.summary.protein.chr22")
gtexEN<-read.table(my.dir %&% "GTEx_tissue-wide_elasticNet_for_ggplot2_2015-05-15.txt",header=T)
chr22EN <- filter(gtexEN,ensid %in% chr22$V5)
ngenes <- length(unique(chr22EN$ensid))
a<-ggplot(chr22EN, aes(x = alpha , y = R2), group=ensid) + facet_wrap(~tissue,ncol=3) + geom_point(show_guide = FALSE) + geom_line(lwd = 0.5, show_guide = FALSE) + aes(color = ensid) + xlab("alpha") + ylab("R2") + ggtitle("GTEx tissue-wide chr22 (" %&% ngenes %&% " genes)")
a

png(filename="GTEx-TW-EN.png")
a
dev.off()
## quartz_off_screen 
##                 2
s_gtexEN<-spread(chr22EN,alpha,R2)
g_gtexEN<-gather(s_gtexEN,alpha,R2,3:5) %>% arrange(tissue)
b<-ggplot(g_gtexEN, aes(x = R2 , y = g_gtexEN[,3], group=alpha, color=alpha)) + facet_wrap(~tissue,ncol=3) + geom_point(show_guide = TRUE) + xlab("alpha R2") + ylab("lasso R2")+ geom_smooth(method = "lm")+geom_abline(intercept=0, slope=1,color='black')+ coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05))+ ggtitle("GTEx tissue-wide chr22 (" %&% ngenes %&% " genes)")
b

png(filename="GTEx-TW-EN_lassoR2_v_alphaR2.png")
b
dev.off()
## quartz_off_screen 
##                 2
ngenesall <- length(unique(gtexEN$ensid))
s_gtexEN<-spread(gtexEN,alpha,R2)
g_gtexEN<-gather(s_gtexEN,alpha,R2,3:5) %>% arrange(tissue)
ggplot(g_gtexEN, aes(x = R2 , y = g_gtexEN[,3], group=alpha, color=alpha)) + facet_wrap(~tissue,ncol=3) + geom_point(show_guide = TRUE) + xlab("alpha R2") + ylab("lasso R2")+ geom_smooth(method = "lm")+geom_abline(intercept=0, slope=1,color='black')+ coord_cartesian(ylim=c(-0.05,1.05),xlim=c(-0.05,1.05))+ ggtitle("GTEx tissue-wide (" %&% ngenesall %&% " genes)")

###GTEx cross-tissue elastic net

cten<-read.table(my.dir %&% 'cross-tissue_exp_10-foldCV_elasticNet_R2_for_ggplot2.txt',header=T,check.names=F)
ngenesall <- length(unique(cten$gene))
g_cten<-gather(cten,alpha,R2,3:5)
a<-ggplot(g_cten, aes(y = `1` - R2, x = `1`, group=alpha, color=alpha)) + geom_point(show_guide = TRUE) + ylab(expression(paste("R"^2, " difference (LASSO - alpha)"))) + xlab(expression(paste("R"^2, " (LASSO)"))) +ggtitle("GTEx cross-tissue (" %&% ngenesall %&% " genes)")+theme_gray(18)
a
## Warning in loop_apply(n, do.ply): Removed 10623 rows containing missing
## values (geom_point).

png(filename="GTEx-cross-tissue-EN_lassoR2_v_alphaR2.png")
a
## Warning in loop_apply(n, do.ply): Removed 10623 rows containing missing
## values (geom_point).
dev.off()
## quartz_off_screen 
##                 2
###what are points below zero?
new_cten<-mutate(g_cten, diff=`1`-R2) %>% arrange(diff)
head(new_cten,n=10L)
##       gene cross-tissue           1 alpha         R2        diff
## 1   RBFADN cross-tissue 0.183801495  0.05 0.25759279 -0.07379129
## 2  GRAMD1B cross-tissue 0.004506654  0.05 0.05309162 -0.04858496
## 3   KCTD18 cross-tissue 0.003009104  0.05 0.05102096 -0.04801185
## 4  FAM101B cross-tissue 0.137451577  0.05 0.17350533 -0.03605376
## 5    WDR81 cross-tissue 0.045779349  0.05 0.08066177 -0.03488242
## 6     FICD cross-tissue 0.005234580  0.05 0.04010692 -0.03487234
## 7     FHIT cross-tissue 0.038865715  0.05 0.07169432 -0.03282860
## 8    NEIL2 cross-tissue 0.192899836  0.05 0.22568531 -0.03278547
## 9    CAPN8 cross-tissue 0.266590755  0.05 0.29804730 -0.03145654
## 10   RPS23 cross-tissue 0.091904576  0.05 0.12333971 -0.03143513
filter(new_cten,gene=='RBFADN')
##     gene cross-tissue         1 alpha        R2          diff
## 1 RBFADN cross-tissue 0.1838015  0.05 0.2575928 -0.0737912903
## 2 RBFADN cross-tissue 0.1838015   0.5 0.1973535 -0.0135520213
## 3 RBFADN cross-tissue 0.1838015  0.95 0.1841779 -0.0003764431

FigS1

GTEx tissue-specific heritability. Local h2 is estimated with SNPs within 1 Mb of each gene.

h2TS<-read.table(my.dir %&% "GTEx_Tissue-Specific_local_h2.txt",header=T)
seTS<-read.table(my.dir %&% "GTEx_Tissue-Specific_local_se.txt",header=T)

###make ordered point+CI h2 plots
gTS_h2 <- gather(h2TS,"Tissue.h2","TissueSpecific.h2",2:11)
gTS_se <- gather(seTS,"Tissue.se","TissueSpecific.se",2:11)
gTS_h2_se <- cbind(gTS_h2,gTS_se[,3])
colnames(gTS_h2_se) <- c("ensid","Tissue","h2","se")
ngenes<-nrow(gTS_h2_se)/length(unique(gTS_h2_se$Tissue))
gTS_h2_se <- gTS_h2_se %>% mutate(ymin = pmax(0, h2 - 2 * se), ymax = pmin(1, h2 + 2 * se) ) %>% arrange(Tissue,h2) %>% mutate(place=rep(1:ngenes,10),nonzeroCI=ymin>0) 
figS1<-ggplot(gTS_h2_se,aes(x=place,y=h2,ymin=ymin, ymax=ymax, color=nonzeroCI) ) + facet_wrap(~Tissue,ncol=2) + geom_pointrange(col='gray')+geom_point()+ theme(legend.justification=c(0,1), legend.position=c(0,1))+ylab(expression("local h"^2)) + xlab(expression("genes ordered by local h"^2))+ theme(axis.text=element_text(size=16),axis.title=element_text(size=18,face="bold"),strip.text.x = element_text(size = 18),legend.text=element_text(size=14),legend.title=element_text(size=14))+ coord_cartesian(ylim=c(0,1))

###calc % nonzero for each tissue TISSUE-WIDE
### ADD to plot legend
pvec<-vector()
h2vec<-vector()
a<-gTS_h2_se %>% select(ensid,Tissue,nonzeroCI) %>% spread(Tissue,nonzeroCI)
for(i in 2:11){
  tis<-colnames(a)[i]
  cat("\n\n---",tis,"---\n")
  print(table(a[,i]))
  per <- signif(table(a[,i])/sum(table(a[,i])),3)*100
  print(per)
  pvec <- c(pvec,per[2])
}
## 
## 
## --- CrossTissue ---
## 
## FALSE  TRUE 
## 14069  2938 
## 
## FALSE  TRUE 
##  82.7  17.3 
## 
## 
## --- AdiposeSubcutaneous ---
## 
## FALSE  TRUE 
## 16800   207 
## 
## FALSE  TRUE 
## 98.80  1.22 
## 
## 
## --- ArteryTibial ---
## 
## FALSE  TRUE 
## 16701   306 
## 
## FALSE  TRUE 
##  98.2   1.8 
## 
## 
## --- HeartLeftVentricle ---
## 
## FALSE  TRUE 
## 16709   298 
## 
## FALSE  TRUE 
## 98.20  1.75 
## 
## 
## --- Lung ---
## 
## FALSE  TRUE 
## 16784   223 
## 
## FALSE  TRUE 
## 98.70  1.31 
## 
## 
## --- MuscleSkeletal ---
## 
## FALSE  TRUE 
## 16573   434 
## 
## FALSE  TRUE 
## 97.40  2.55 
## 
## 
## --- NerveTibial ---
## 
## FALSE  TRUE 
## 16628   379 
## 
## FALSE  TRUE 
## 97.80  2.23 
## 
## 
## --- SkinSunExposedLowerleg ---
## 
## FALSE  TRUE 
## 16558   449 
## 
## FALSE  TRUE 
## 97.40  2.64 
## 
## 
## --- Thyroid ---
## 
## FALSE  TRUE 
## 16565   442 
## 
## FALSE  TRUE 
##  97.4   2.6 
## 
## 
## --- WholeBlood ---
## 
## FALSE  TRUE 
## 16517   490 
## 
## FALSE  TRUE 
## 97.10  2.88
###calc mean h2 for each tissue
a<-gTS_h2_se %>% select(ensid,Tissue,h2) %>% spread(Tissue,h2)
for(i in 2:11){
  tis<-colnames(a)[i]
  meanh2 <- signif(mean(a[,i]),3)
  cat(tis,"mean h2:",meanh2,"\n")
  h2vec <- c(h2vec,meanh2)
}
## CrossTissue mean h2: 0.057 
## AdiposeSubcutaneous mean h2: 0.0204 
## ArteryTibial mean h2: 0.0249 
## HeartLeftVentricle mean h2: 0.0323 
## Lung mean h2: 0.0223 
## MuscleSkeletal mean h2: 0.0214 
## NerveTibial mean h2: 0.0281 
## SkinSunExposedLowerleg mean h2: 0.0288 
## Thyroid mean h2: 0.0265 
## WholeBlood mean h2: 0.0261
ann_text <- data.frame( h2 = rep(0.78,10), place = rep(5000,10), percent= pvec, mean_h2 = h2vec, Tissue = factor(colnames(a)[2:11]), ymin=rep(0.9,10), ymax=rep(0.9,10), nonzeroCI=rep(NA,10), se=rep(0.9,10))
figS1.2<-figS1+geom_text(data=ann_text,aes(label=paste("mean_h^2 ==",mean_h2,sep="")),color="black",show_guide=F,parse=T,hjust=0,size=5) 
ann_text <- data.frame( h2 = rep(0.9,10), place = rep(5000,10), percent= pvec, mean_h2 = h2vec, Tissue = factor(colnames(a)[2:11]), ymin=rep(0.9,10), ymax=rep(0.9,10), nonzeroCI=rep(NA,10), se=rep(0.9,10))
figS1<-figS1.2+geom_text(data=ann_text,aes(label=paste("percent_TRUE ==",percent,sep="")),color="black",show_guide=F,parse=T,hjust=0,size=5)

figS1

tiff(filename=fig.dir %&% "FigS1.tiff",width=720,height=960)
figS1
dev.off()
## quartz_off_screen 
##                 2
png(filename=fig.dir %&% "FigS1.png",width=720,height=960)
figS1
dev.off()
## quartz_off_screen 
##                 2

FigS2

GTEx cross-tissue and tissue-specific h2 (A) and SE (B).

h2TS<-read.table(my.dir %&% "GTEx_Tissue-Specific_local_h2.txt",header=T)
seTS<-read.table(my.dir %&% "GTEx_Tissue-Specific_local_se.txt",header=T)

gh2_TS<-gather(h2TS,"CrossTissue","Tissue",3:11)
colnames(gh2_TS) <- c('ensid','CrossTissue','TissueName','Tissue')
figS2a<-ggplot(gh2_TS,aes(x=Tissue,y=CrossTissue)) +facet_wrap(~TissueName,scales="fixed",ncol=3) + geom_point(alpha=1/10) + geom_abline(intercept=0, slope=1,color='red')  + ylab(expression('Cross-Tissue h'^2)) + xlab(expression('Tissue-Specific h'^2)) +  ggtitle("A")+ coord_cartesian(ylim=c(-0.1,1.1),xlim=c(-0.1,1.1)) + theme(plot.title = element_text(hjust = 0,face="bold"))

gse_TS<-gather(seTS,"CrossTissue","Tissue",3:11)
colnames(gse_TS) <- c('ensid','CrossTissue','TissueName','Tissue')
figS2b<-ggplot(gse_TS,aes(x=Tissue,y=CrossTissue)) +facet_wrap(~TissueName,scales="fixed",ncol=3) + geom_point(alpha=1/10) + geom_abline(intercept=0, slope=1,color='red') + ylab('Cross-Tissue SE') + xlab('Tissue-Specific SE') +  ggtitle("B") + coord_cartesian(ylim=c(-0.01,0.16),xlim=c(-0.01,0.16))+ theme(plot.title = element_text(hjust = 0,face="bold")) 
multiplot(figS2a,figS2b)

tiff(filename=fig.dir %&% "FigS2.tiff",width=480,height=960)
multiplot(figS2a,figS2b)
dev.off()
## quartz_off_screen 
##                 2
png(filename=fig.dir %&% "FigS2.png",width=480,height=960)
multiplot(figS2a,figS2b)
dev.off()
## quartz_off_screen 
##                 2

FigSx h2 v. exp level

[[ADD h2 vs. variance]]

h2 <- read.table('/Volumes/im-lab/nas40t2/hwheeler/PrediXcan_CV/cis.v.trans.prediction/DGN-WB.localGRM.h2.exp.2014-08-30.txt',header=T)
exp <- read.table('/Volumes/im-lab/nas40t2/hwheeler/PrediXcan_CV/cis.v.trans.prediction/DGN-WB.mean.median.expression.2015-02-23.txt',header=T)
all <- inner_join(h2,exp,by='gene')
ngenes<-dim(all)[[1]]
fig<-ggplot(all,aes(x=expmeans,y=h2)) + geom_point() + xlab("Mean expression") + ylab(expression("Local h"^2)) + ggtitle("DGN-WB (" %&% ngenes %&% " genes)") + theme_bw(20)
fig

tiff(filename=fig.dir %&% "FigSx-h2_v_exp.tiff",width=480,height=480)
fig
dev.off()
## quartz_off_screen 
##                 2
png(filename=fig.dir %&% "FigSx-h2_v_exp.png",width=480,height=480)
fig
dev.off()
## quartz_off_screen 
##                 2